rm(list=ls())
library(tidyverse)
library(readxl)

# Read and structure mun urban 2000 and 2010
file <- list.files(here::here("data","raw","IBGE"), pattern = "tab1309_mun_urban_vs_rural", full.names = T)

urban_2000_1 <- read_excel(file,
                      sheet = "Tabela 1",
                      skip = 4,
                      col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                      col_types = c(rep("text",4),"numeric"),
                      na = "...") %>% 
    filter(!is.na(mun_name)) %>% 
    select(-sex,-var_desc)
  
urban_2000_2 <- read_excel(file,
                           sheet = "Tabela 2",
                           skip = 4,
                           col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                           col_types = c(rep("text",4),"numeric"),
                           na = "...") %>% 
  filter(!is.na(mun_name)) %>% 
  select(-sex,-var_desc)

urban_2000_3 <- read_excel(file,
                           sheet = "Tabela 3",
                           skip = 4,
                           col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                           col_types = c(rep("text",4),"numeric"),
                           na = "...") %>% 
  filter(!is.na(mun_name)) %>% 
  select(-sex,-var_desc)

urban_2000 <- bind_rows(urban_2000_1,urban_2000_2,urban_2000_3) %>% 
  mutate(census_year = 2000)
         

urban_2010_1 <- read_excel(file,
                           sheet = "Tabela 4",
                           skip = 4,
                           col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                           col_types = c(rep("text",4),"numeric"),
                           na = "...") %>% 
  filter(!is.na(mun_name)) %>% 
  select(-sex,-var_desc)

urban_2010_2 <- read_excel(file,
                           sheet = "Tabela 5",
                           skip = 4,
                           col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                           col_types = c(rep("text",4),"numeric"),
                           na = "...") %>% 
  filter(!is.na(mun_name)) %>% 
  select(-sex,-var_desc)

urban_2010_3 <- read_excel(file,
                           sheet = "Tabela 6",
                           skip = 4,
                           col_names = c("mun_code","mun_name","var_desc","sex","urb_perc"),
                           col_types = c(rep("text",4),"numeric"),
                           na = "...") %>% 
  filter(!is.na(mun_name)) %>% 
  select(-sex,-var_desc)

urban_2010 <- bind_rows(urban_2010_1,urban_2010_2,urban_2010_3) %>% 
  mutate(census_year = 2010)

urban_mun <- bind_rows(urban_2000,urban_2010) %>% 
  group_by(mun_code,mun_name,census_year) %>% 
  summarise(urb_perc = sum(urb_perc, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(mun_code = str_sub(mun_code,1,-2))


# SaveRdS ----

write_rds(urban_mun,here::here("data","processed","citycharacteristics","urban_mun.rds"))
